{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "因子分析的基本概念:\n"
     ]
    },
    {
     "data": {
      "text/latex": [
       "因子分析模型: $X=\\mu+\\Lambda F+\\epsilon$"
      ],
      "text/plain": [
       "<IPython.core.display.Latex object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/latex": [
       "其中$E(F)=0,\\quad E(\\epsilon)=0,\\quad Cov(F)=I_m,\\quad D(\\epsilon)=Cov(\\epsilon)=diag(\\sigma_1^2,\\cdots,\\sigma_m^2),\\quad Cov(F, \\epsilon)=0$"
      ],
      "text/plain": [
       "<IPython.core.display.Latex object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/latex": [
       "原始变量$X$的协方差矩阵分解: $Cov(X)=\\Lambda\\Lambda^T+diag(\\sigma_1^2,\\cdots,\\sigma_m^2)$"
      ],
      "text/plain": [
       "<IPython.core.display.Latex object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/latex": [
       "载荷因子$\\alpha_{ij}$反映第$i$个变量和第$j$个公共因子的相关系数. 绝对值越大相关的密切程度越高."
      ],
      "text/plain": [
       "<IPython.core.display.Latex object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/latex": [
       "变量$X_i$的共同度记为$h_i^2=\\sum\\limits_{j=1}^m\\alpha_{ij}^2$, 又有$1=h_i^2+\\sigma_i^2$, 故$h_i^2$越接近1, 因子分析效果越好"
      ],
      "text/plain": [
       "<IPython.core.display.Latex object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/latex": [
       "$\\Lambda$中各列平方和$S_j=\\sum\\limits_{i=1}^p\\alpha_{ij}^2$, 用于衡量$F_j$的相对重要性."
      ],
      "text/plain": [
       "<IPython.core.display.Latex object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "\n",
      "主成分分析法估计载荷因子:\n"
     ]
    },
    {
     "data": {
      "text/latex": [
       "设相关系数矩阵$R$的特征值和对应特征向量分别为: $\\lambda_1\\ge\\lambda_2\\ge\\cdots\\ge\\lambda_p$和$\\eta_1,\\eta_2,\\cdots,\\eta_p$"
      ],
      "text/plain": [
       "<IPython.core.display.Latex object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/latex": [
       "设m<p, 则因子载荷矩阵$\\Lambda=[\\sqrt{\\lambda_1}\\eta_1,\\sqrt{\\lambda_2}\\eta_2,\\cdots,\\sqrt{\\lambda_m}\\eta_m]$"
      ],
      "text/plain": [
       "<IPython.core.display.Latex object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/latex": [
       "特殊因子的方差用$R-\\Lambda\\Lambda^T$的对角元来估计. 即$\\sigma_i^2=1-\\sum\\limits_{j=1}^m\\alpha_{ij}^2$"
      ],
      "text/plain": [
       "<IPython.core.display.Latex object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/latex": [
       "因子载荷矩阵的估计方法: 1.主成分分析法(242页); "
      ],
      "text/plain": [
       "<IPython.core.display.Latex object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/latex": [
       "通过因子旋转来直观的判断因子的实际意义"
      ],
      "text/plain": [
       "<IPython.core.display.Latex object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/latex": [
       "因子得分: 反过来把公共因子表示为原变量的线性组合."
      ],
      "text/plain": [
       "<IPython.core.display.Latex object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/latex": [
       "因子得分函数: $F_j=c_j+\\beta_{j1}X_1+\\cdots+\\beta_{jp}X_p,\\ j=1,2,\\cdots,m$"
      ],
      "text/plain": [
       "<IPython.core.display.Latex object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "\n",
      "因子得分:\n"
     ]
    },
    {
     "data": {
      "text/latex": [
       "巴特莱特因子得分估计: $\\hat{F}=(\\Lambda^TD^{-1}\\Lambda)^{-1}\\Lambda^TD^{-1}(X-\\mu)$"
      ],
      "text/plain": [
       "<IPython.core.display.Latex object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/latex": [
       "回归方法因子得分估计: $\\hat{F}=(\\hat{F}_{ij})_{n\\times m}=X_0R^{-1}\\Lambda$"
      ],
      "text/plain": [
       "<IPython.core.display.Latex object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 因子分析模型(241)\n",
    "\n",
    "from IPython.display import Latex\n",
    "from IPython.display import display, Math, Latex\n",
    "print_latex = lambda latex_str: display(Latex(latex_str))\n",
    "\n",
    "print('因子分析的基本概念:')\n",
    "print_latex(r'因子分析模型: $X=\\mu+\\Lambda F+\\epsilon$')\n",
    "print_latex(r'其中$E(F)=0,\\quad E(\\epsilon)=0,\\quad Cov(F)=I_m,\\quad D(\\epsilon)=Cov(\\epsilon)=diag(\\sigma_1^2,\\cdots,\\sigma_m^2),\\quad Cov(F, \\epsilon)=0$')\n",
    "print_latex(r'原始变量$X$的协方差矩阵分解: $Cov(X)=\\Lambda\\Lambda^T+diag(\\sigma_1^2,\\cdots,\\sigma_m^2)$')\n",
    "print_latex(r'载荷因子$\\alpha_{ij}$反映第$i$个变量和第$j$个公共因子的相关系数. 绝对值越大相关的密切程度越高.')\n",
    "print_latex(r'变量$X_i$的共同度记为$h_i^2=\\sum\\limits_{j=1}^m\\alpha_{ij}^2$, 又有$1=h_i^2+\\sigma_i^2$, 故$h_i^2$越接近1, 因子分析效果越好')\n",
    "print_latex(r'$\\Lambda$中各列平方和$S_j=\\sum\\limits_{i=1}^p\\alpha_{ij}^2$, 用于衡量$F_j$的相对重要性.')\n",
    "print('\\n'*3)\n",
    "print('主成分分析法估计载荷因子:')\n",
    "print_latex(r'设相关系数矩阵$R$的特征值和对应特征向量分别为: $\\lambda_1\\ge\\lambda_2\\ge\\cdots\\ge\\lambda_p$和$\\eta_1,\\eta_2,\\cdots,\\eta_p$')\n",
    "print_latex(r'设m<p, 则因子载荷矩阵$\\Lambda=[\\sqrt{\\lambda_1}\\eta_1,\\sqrt{\\lambda_2}\\eta_2,\\cdots,\\sqrt{\\lambda_m}\\eta_m]$')\n",
    "print_latex(r'特殊因子的方差用$R-\\Lambda\\Lambda^T$的对角元来估计. 即$\\sigma_i^2=1-\\sum\\limits_{j=1}^m\\alpha_{ij}^2$')\n",
    "print_latex(r'因子载荷矩阵的估计方法: 1.主成分分析法(242页); ')\n",
    "print_latex(r'通过因子旋转来直观的判断因子的实际意义')\n",
    "print_latex(r'因子得分: 反过来把公共因子表示为原变量的线性组合.')\n",
    "print_latex(r'因子得分函数: $F_j=c_j+\\beta_{j1}X_1+\\cdots+\\beta_{jp}X_p,\\ j=1,2,\\cdots,m$')\n",
    "print('\\n'*3)\n",
    "print('因子得分:')\n",
    "print_latex(r'巴特莱特因子得分估计: $\\hat{F}=(\\Lambda^TD^{-1}\\Lambda)^{-1}\\Lambda^TD^{-1}(X-\\mu)$')\n",
    "print_latex(r'回归方法因子得分估计: $\\hat{F}=(\\hat{F}_{ij})_{n\\times m}=X_0R^{-1}\\Lambda$')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2.85671099 0.80916372 0.53967524 0.4515001  0.34294995]\n",
      "[[ 0.78357657 -0.21619342 -0.44937468  0.25979429 -0.26426783]\n",
      " [ 0.77259486 -0.45813757  0.13090262  0.13873788  0.39600941]\n",
      " [ 0.79468181 -0.23428242  0.24614116 -0.44512148 -0.23425194]\n",
      " [ 0.71234149  0.47285413  0.39725834  0.31715858 -0.10283392]\n",
      " [ 0.71194547  0.52350245 -0.31969123 -0.25697497  0.22547776]]\n",
      "[0.5713422  0.16183274 0.10793505 0.09030002 0.06858999]\n",
      "[0.5713422  0.73317494 0.84110999 0.93141001 1.        ]\n",
      "[0.4326168480911273, 0.6855427113216124, 0.43960060701853165, -0.1851956198795388, -0.23544791397276743]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "from sklearn.decomposition import PCA\n",
    "import scipy\n",
    "import sympy\n",
    "import pandas as pd\n",
    "\n",
    "R = np.array([[1.000, 0.577, 0.509, 0.387, 0.462],\n",
    "              [0.577, 1.000, 0.599, 0.389, 0.322],\n",
    "              [0.509, 0.599, 1.000, 0.436, 0.426],\n",
    "              [0.387, 0.389, 0.436, 1.000, 0.523],\n",
    "              [0.462, 0.322, 0.426, 0.523, 1.000]\n",
    "             ])\n",
    "\n",
    "# 列为单位特征向量. 由于R是对称阵, 因此都是正交的特征向量(下面一行可以验证这一点).\n",
    "# print(np.array([[np.round(np.sum(eigvec[:,i]*eigvec[:,j])) for i in range(R.shape[0])] for j in range(R.shape[1])]))\n",
    "eigval, eigvec = np.linalg.eig(R)\n",
    "order = eigval.argsort()[::-1]\n",
    "eigvec = np.array([eigvec[:, order[i]] for i in range(order.shape[0])]).T\n",
    "eigval = np.sort(eigval)[::-1]\n",
    "eigvec = eigvec*np.sign(np.sum(eigvec, axis=0))\n",
    "# 因子载荷矩阵\n",
    "Lambda = eigvec*np.sqrt(eigval)\n",
    "print(eigval, Lambda, sep='\\n')\n",
    "\n",
    "# 信息贡献率\n",
    "b = np.array([eigval[i]/eigval.sum() for i in range(eigval.shape[0])])\n",
    "print(b)\n",
    "# 累积贡献率\n",
    "alpha = np.array([b[:i+1].sum() for i in range(b.shape[0])])\n",
    "print(alpha)\n",
    "\n",
    "m = 2\n",
    "# 特殊因子方差\n",
    "var_e = [1-Lambda[i, :m].sum() for i in range(Lambda.shape[0])]\n",
    "print(var_e)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1797, 56)\n",
      "(56, 64)\n",
      "(64,)\n",
      "(64,)\n",
      "[ 0.  0.  6. 14. 10.  2.  1.  0. -0.  1. 14. 15. 11. 15.  6.  0.  0.  4.\n",
      " 16.  3.  1. 12.  8.  0. -0.  5. 13.  1.  1.  9.  9. -0.  0.  6.  9.  1.\n",
      "  1. 10.  9.  0.  0.  5. 12.  1.  2. 13.  7.  0.  0.  2. 15.  6. 11. 13.\n",
      "  1.  0. -0.  0.  7. 14. 11.  1.  0.  1.]\n",
      "[ 0.  0.  5. 13.  9.  1.  0.  0.  0.  0. 13. 15. 10. 15.  5.  0.  0.  3.\n",
      " 15.  2.  0. 11.  8.  0.  0.  4. 12.  0.  0.  8.  8.  0.  0.  5.  8.  0.\n",
      "  0.  9.  8.  0.  0.  4. 11.  0.  1. 12.  7.  0.  0.  2. 14.  5. 10. 12.\n",
      "  0.  0.  0.  0.  6. 13. 10.  0.  0.  0.]\n"
     ]
    }
   ],
   "source": [
    "from sklearn.datasets import load_digits\n",
    "from sklearn.decomposition import FactorAnalysis\n",
    "import numpy as np\n",
    "\n",
    "X, _ = load_digits(return_X_y=True)\n",
    "fa = FactorAnalysis(n_components=56, random_state=0)\n",
    "X_transformed = fa.fit_transform(X)\n",
    "print(X_transformed.shape)\n",
    "print(fa.components_.shape)\n",
    "print(fa.noise_variance_.shape)\n",
    "print(fa.mean_.shape)\n",
    "# 变换的公式满足下面这个:\n",
    "print(np.round(fa.mean_ + np.matmul(X_transformed[0], fa.components_) + fa.noise_variance_, 0))\n",
    "print(X[0])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
